﻿using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Web;
using ICSharpCode.SharpZipLib.Zip;
using SDT.CodeSystem;

using SDT.CrawlSystem.Servers.Filters;
using SDT.FileSystem;
using SDT.Libraries.Convertor;
using SDT.Libraries.Impersonator;
using SDT.ServiceSystem;
using SDT.DataService.Services;
using System.Text.RegularExpressions;
using SDT.Libraries.DuplicateDetection;

namespace SDT.CrawlSystem.Servers
{
    
    public sealed partial class WebCrawler 
    {
       

        private string StripHTML(string htmlString)
        {
            string pattern = @"<(.|\n)*?>";
            return Regex.Replace(htmlString, pattern, string.Empty);

        }

        public static string ConvertNewlinesToSingleSpaces(string value)
        {
            value = value.Replace("\r\n", " ");
            value = value.Replace('\n', ' ');
            return value;
        }
        public string iso8859ToUnicode(string textToConvert)
        {
            Encoding iso8859 = Encoding.GetEncoding("iso-8859-1");
            Encoding unicode = Encoding.Unicode;
            byte[] srcTextBytes = iso8859.GetBytes(textToConvert);

            byte[] destTextBytes = Encoding.Convert(iso8859, unicode, srcTextBytes);

            char[] destChars = new char[unicode.GetCharCount(destTextBytes, 0, destTextBytes.Length)];
            unicode.GetChars(destTextBytes, 0, destTextBytes.Length, destChars, 0);

            StringBuilder result = new StringBuilder(textToConvert.Length + (int)(textToConvert.Length * 0.1));

            foreach (char c in destChars)
            {
                int value = Convert.ToInt32(c);
                if (value > 127)
                    result.AppendFormat("&#{0};", value);
                else
                    result.Append(c);
            }
            string stemp = result.ToString().Replace("&#147;", "''");
            stemp = stemp.ToString().Replace("&#148;", "''");
            return stemp;
        }

    }
}